<!DOCTYPE html>
<HTML lang = "en">
<HEAD>
  <meta charset="UTF-8"/>
  <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
  <title>Code Profiling and Optimization</title>
  

  <script type="text/x-mathjax-config">
    MathJax.Hub.Config({
      tex2jax: {inlineMath: [['$','$'], ['\\(','\\)']]},
      TeX: { equationNumbers: { autoNumber: "AMS" } }
    });
  </script>

  <script type="text/javascript" async src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.1/MathJax.js?config=TeX-AMS-MML_HTMLorMML">
  </script>

  
<style>
pre.hljl {
    border: 1px solid #ccc;
    margin: 5px;
    padding: 5px;
    overflow-x: auto;
    color: rgb(68,68,68); background-color: rgb(251,251,251); }
pre.hljl > span.hljl-t { }
pre.hljl > span.hljl-w { }
pre.hljl > span.hljl-e { }
pre.hljl > span.hljl-eB { }
pre.hljl > span.hljl-o { }
pre.hljl > span.hljl-k { color: rgb(148,91,176); font-weight: bold; }
pre.hljl > span.hljl-kc { color: rgb(59,151,46); font-style: italic; }
pre.hljl > span.hljl-kd { color: rgb(214,102,97); font-style: italic; }
pre.hljl > span.hljl-kn { color: rgb(148,91,176); font-weight: bold; }
pre.hljl > span.hljl-kp { color: rgb(148,91,176); font-weight: bold; }
pre.hljl > span.hljl-kr { color: rgb(148,91,176); font-weight: bold; }
pre.hljl > span.hljl-kt { color: rgb(148,91,176); font-weight: bold; }
pre.hljl > span.hljl-n { }
pre.hljl > span.hljl-na { }
pre.hljl > span.hljl-nb { }
pre.hljl > span.hljl-nbp { }
pre.hljl > span.hljl-nc { }
pre.hljl > span.hljl-ncB { }
pre.hljl > span.hljl-nd { color: rgb(214,102,97); }
pre.hljl > span.hljl-ne { }
pre.hljl > span.hljl-neB { }
pre.hljl > span.hljl-nf { color: rgb(66,102,213); }
pre.hljl > span.hljl-nfm { color: rgb(66,102,213); }
pre.hljl > span.hljl-np { }
pre.hljl > span.hljl-nl { }
pre.hljl > span.hljl-nn { }
pre.hljl > span.hljl-no { }
pre.hljl > span.hljl-nt { }
pre.hljl > span.hljl-nv { }
pre.hljl > span.hljl-nvc { }
pre.hljl > span.hljl-nvg { }
pre.hljl > span.hljl-nvi { }
pre.hljl > span.hljl-nvm { }
pre.hljl > span.hljl-l { }
pre.hljl > span.hljl-ld { color: rgb(148,91,176); font-style: italic; }
pre.hljl > span.hljl-s { color: rgb(201,61,57); }
pre.hljl > span.hljl-sa { color: rgb(201,61,57); }
pre.hljl > span.hljl-sb { color: rgb(201,61,57); }
pre.hljl > span.hljl-sc { color: rgb(201,61,57); }
pre.hljl > span.hljl-sd { color: rgb(201,61,57); }
pre.hljl > span.hljl-sdB { color: rgb(201,61,57); }
pre.hljl > span.hljl-sdC { color: rgb(201,61,57); }
pre.hljl > span.hljl-se { color: rgb(59,151,46); }
pre.hljl > span.hljl-sh { color: rgb(201,61,57); }
pre.hljl > span.hljl-si { }
pre.hljl > span.hljl-so { color: rgb(201,61,57); }
pre.hljl > span.hljl-sr { color: rgb(201,61,57); }
pre.hljl > span.hljl-ss { color: rgb(201,61,57); }
pre.hljl > span.hljl-ssB { color: rgb(201,61,57); }
pre.hljl > span.hljl-nB { color: rgb(59,151,46); }
pre.hljl > span.hljl-nbB { color: rgb(59,151,46); }
pre.hljl > span.hljl-nfB { color: rgb(59,151,46); }
pre.hljl > span.hljl-nh { color: rgb(59,151,46); }
pre.hljl > span.hljl-ni { color: rgb(59,151,46); }
pre.hljl > span.hljl-nil { color: rgb(59,151,46); }
pre.hljl > span.hljl-noB { color: rgb(59,151,46); }
pre.hljl > span.hljl-oB { color: rgb(102,102,102); font-weight: bold; }
pre.hljl > span.hljl-ow { color: rgb(102,102,102); font-weight: bold; }
pre.hljl > span.hljl-p { }
pre.hljl > span.hljl-c { color: rgb(153,153,119); font-style: italic; }
pre.hljl > span.hljl-ch { color: rgb(153,153,119); font-style: italic; }
pre.hljl > span.hljl-cm { color: rgb(153,153,119); font-style: italic; }
pre.hljl > span.hljl-cp { color: rgb(153,153,119); font-style: italic; }
pre.hljl > span.hljl-cpB { color: rgb(153,153,119); font-style: italic; }
pre.hljl > span.hljl-cs { color: rgb(153,153,119); font-style: italic; }
pre.hljl > span.hljl-csB { color: rgb(153,153,119); font-style: italic; }
pre.hljl > span.hljl-g { }
pre.hljl > span.hljl-gd { }
pre.hljl > span.hljl-ge { }
pre.hljl > span.hljl-geB { }
pre.hljl > span.hljl-gh { }
pre.hljl > span.hljl-gi { }
pre.hljl > span.hljl-go { }
pre.hljl > span.hljl-gp { }
pre.hljl > span.hljl-gs { }
pre.hljl > span.hljl-gsB { }
pre.hljl > span.hljl-gt { }
</style>



  <style type="text/css">
  @font-face {
  font-style: normal;
  font-weight: 300;
}
@font-face {
  font-style: normal;
  font-weight: 400;
}
@font-face {
  font-style: normal;
  font-weight: 600;
}
html {
  font-family: sans-serif; /* 1 */
  -ms-text-size-adjust: 100%; /* 2 */
  -webkit-text-size-adjust: 100%; /* 2 */
}
body {
  margin: 0;
}
article,
aside,
details,
figcaption,
figure,
footer,
header,
hgroup,
main,
menu,
nav,
section,
summary {
  display: block;
}
audio,
canvas,
progress,
video {
  display: inline-block; /* 1 */
  vertical-align: baseline; /* 2 */
}
audio:not([controls]) {
  display: none;
  height: 0;
}
[hidden],
template {
  display: none;
}
a:active,
a:hover {
  outline: 0;
}
abbr[title] {
  border-bottom: 1px dotted;
}
b,
strong {
  font-weight: bold;
}
dfn {
  font-style: italic;
}
h1 {
  font-size: 2em;
  margin: 0.67em 0;
}
mark {
  background: #ff0;
  color: #000;
}
small {
  font-size: 80%;
}
sub,
sup {
  font-size: 75%;
  line-height: 0;
  position: relative;
  vertical-align: baseline;
}
sup {
  top: -0.5em;
}
sub {
  bottom: -0.25em;
}
img {
  border: 0;
}
svg:not(:root) {
  overflow: hidden;
}
figure {
  margin: 1em 40px;
}
hr {
  -moz-box-sizing: content-box;
  box-sizing: content-box;
  height: 0;
}
pre {
  overflow: auto;
}
code,
kbd,
pre,
samp {
  font-family: monospace, monospace;
  font-size: 1em;
}
button,
input,
optgroup,
select,
textarea {
  color: inherit; /* 1 */
  font: inherit; /* 2 */
  margin: 0; /* 3 */
}
button {
  overflow: visible;
}
button,
select {
  text-transform: none;
}
button,
html input[type="button"], /* 1 */
input[type="reset"],
input[type="submit"] {
  -webkit-appearance: button; /* 2 */
  cursor: pointer; /* 3 */
}
button[disabled],
html input[disabled] {
  cursor: default;
}
button::-moz-focus-inner,
input::-moz-focus-inner {
  border: 0;
  padding: 0;
}
input {
  line-height: normal;
}
input[type="checkbox"],
input[type="radio"] {
  box-sizing: border-box; /* 1 */
  padding: 0; /* 2 */
}
input[type="number"]::-webkit-inner-spin-button,
input[type="number"]::-webkit-outer-spin-button {
  height: auto;
}
input[type="search"] {
  -webkit-appearance: textfield; /* 1 */
  -moz-box-sizing: content-box;
  -webkit-box-sizing: content-box; /* 2 */
  box-sizing: content-box;
}
input[type="search"]::-webkit-search-cancel-button,
input[type="search"]::-webkit-search-decoration {
  -webkit-appearance: none;
}
fieldset {
  border: 1px solid #c0c0c0;
  margin: 0 2px;
  padding: 0.35em 0.625em 0.75em;
}
legend {
  border: 0; /* 1 */
  padding: 0; /* 2 */
}
textarea {
  overflow: auto;
}
optgroup {
  font-weight: bold;
}
table {
  font-family: monospace, monospace;
  font-size : 0.8em;
  border-collapse: collapse;
  border-spacing: 0;
}
td,
th {
  padding: 0;
}
thead th {
    border-bottom: 1px solid black;
    background-color: white;
}
tr:nth-child(odd){
  background-color: rgb(248,248,248);
}


/*
* Skeleton V2.0.4
* Copyright 2014, Dave Gamache
* www.getskeleton.com
* Free to use under the MIT license.
* http://www.opensource.org/licenses/mit-license.php
* 12/29/2014
*/
.container {
  position: relative;
  width: 100%;
  max-width: 960px;
  margin: 0 auto;
  padding: 0 20px;
  box-sizing: border-box; }
.column,
.columns {
  width: 100%;
  float: left;
  box-sizing: border-box; }
@media (min-width: 400px) {
  .container {
    width: 85%;
    padding: 0; }
}
@media (min-width: 550px) {
  .container {
    width: 80%; }
  .column,
  .columns {
    margin-left: 4%; }
  .column:first-child,
  .columns:first-child {
    margin-left: 0; }

  .one.column,
  .one.columns                    { width: 4.66666666667%; }
  .two.columns                    { width: 13.3333333333%; }
  .three.columns                  { width: 22%;            }
  .four.columns                   { width: 30.6666666667%; }
  .five.columns                   { width: 39.3333333333%; }
  .six.columns                    { width: 48%;            }
  .seven.columns                  { width: 56.6666666667%; }
  .eight.columns                  { width: 65.3333333333%; }
  .nine.columns                   { width: 74.0%;          }
  .ten.columns                    { width: 82.6666666667%; }
  .eleven.columns                 { width: 91.3333333333%; }
  .twelve.columns                 { width: 100%; margin-left: 0; }

  .one-third.column               { width: 30.6666666667%; }
  .two-thirds.column              { width: 65.3333333333%; }

  .one-half.column                { width: 48%; }

  /* Offsets */
  .offset-by-one.column,
  .offset-by-one.columns          { margin-left: 8.66666666667%; }
  .offset-by-two.column,
  .offset-by-two.columns          { margin-left: 17.3333333333%; }
  .offset-by-three.column,
  .offset-by-three.columns        { margin-left: 26%;            }
  .offset-by-four.column,
  .offset-by-four.columns         { margin-left: 34.6666666667%; }
  .offset-by-five.column,
  .offset-by-five.columns         { margin-left: 43.3333333333%; }
  .offset-by-six.column,
  .offset-by-six.columns          { margin-left: 52%;            }
  .offset-by-seven.column,
  .offset-by-seven.columns        { margin-left: 60.6666666667%; }
  .offset-by-eight.column,
  .offset-by-eight.columns        { margin-left: 69.3333333333%; }
  .offset-by-nine.column,
  .offset-by-nine.columns         { margin-left: 78.0%;          }
  .offset-by-ten.column,
  .offset-by-ten.columns          { margin-left: 86.6666666667%; }
  .offset-by-eleven.column,
  .offset-by-eleven.columns       { margin-left: 95.3333333333%; }

  .offset-by-one-third.column,
  .offset-by-one-third.columns    { margin-left: 34.6666666667%; }
  .offset-by-two-thirds.column,
  .offset-by-two-thirds.columns   { margin-left: 69.3333333333%; }

  .offset-by-one-half.column,
  .offset-by-one-half.columns     { margin-left: 52%; }

}
html {
  font-size: 62.5%; }
body {
  font-size: 1.5em; /* currently ems cause chrome bug misinterpreting rems on body element */
  line-height: 1.6;
  font-weight: 400;
  font-family: "Raleway", "HelveticaNeue", "Helvetica Neue", Helvetica, Arial, sans-serif;
  color: #222; }
h1, h2, h3, h4, h5, h6 {
  margin-top: 0;
  margin-bottom: 2rem;
  font-weight: 300; }
h1 { font-size: 3.6rem; line-height: 1.2;  letter-spacing: -.1rem;}
h2 { font-size: 3.4rem; line-height: 1.25; letter-spacing: -.1rem; }
h3 { font-size: 3.2rem; line-height: 1.3;  letter-spacing: -.1rem; }
h4 { font-size: 2.8rem; line-height: 1.35; letter-spacing: -.08rem; }
h5 { font-size: 2.4rem; line-height: 1.5;  letter-spacing: -.05rem; }
h6 { font-size: 1.5rem; line-height: 1.6;  letter-spacing: 0; }

p {
  margin-top: 0; }
a {
  color: #1EAEDB; }
a:hover {
  color: #0FA0CE; }
.button,
button,
input[type="submit"],
input[type="reset"],
input[type="button"] {
  display: inline-block;
  height: 38px;
  padding: 0 30px;
  color: #555;
  text-align: center;
  font-size: 11px;
  font-weight: 600;
  line-height: 38px;
  letter-spacing: .1rem;
  text-transform: uppercase;
  text-decoration: none;
  white-space: nowrap;
  background-color: transparent;
  border-radius: 4px;
  border: 1px solid #bbb;
  cursor: pointer;
  box-sizing: border-box; }
.button:hover,
button:hover,
input[type="submit"]:hover,
input[type="reset"]:hover,
input[type="button"]:hover,
.button:focus,
button:focus,
input[type="submit"]:focus,
input[type="reset"]:focus,
input[type="button"]:focus {
  color: #333;
  border-color: #888;
  outline: 0; }
.button.button-primary,
button.button-primary,
input[type="submit"].button-primary,
input[type="reset"].button-primary,
input[type="button"].button-primary {
  color: #FFF;
  background-color: #33C3F0;
  border-color: #33C3F0; }
.button.button-primary:hover,
button.button-primary:hover,
input[type="submit"].button-primary:hover,
input[type="reset"].button-primary:hover,
input[type="button"].button-primary:hover,
.button.button-primary:focus,
button.button-primary:focus,
input[type="submit"].button-primary:focus,
input[type="reset"].button-primary:focus,
input[type="button"].button-primary:focus {
  color: #FFF;
  background-color: #1EAEDB;
  border-color: #1EAEDB; }
input[type="email"],
input[type="number"],
input[type="search"],
input[type="text"],
input[type="tel"],
input[type="url"],
input[type="password"],
textarea,
select {
  height: 38px;
  padding: 6px 10px; /* The 6px vertically centers text on FF, ignored by Webkit */
  background-color: #fff;
  border: 1px solid #D1D1D1;
  border-radius: 4px;
  box-shadow: none;
  box-sizing: border-box; }
/* Removes awkward default styles on some inputs for iOS */
input[type="email"],
input[type="number"],
input[type="search"],
input[type="text"],
input[type="tel"],
input[type="url"],
input[type="password"],
textarea {
  -webkit-appearance: none;
     -moz-appearance: none;
          appearance: none; }
textarea {
  min-height: 65px;
  padding-top: 6px;
  padding-bottom: 6px; }
input[type="email"]:focus,
input[type="number"]:focus,
input[type="search"]:focus,
input[type="text"]:focus,
input[type="tel"]:focus,
input[type="url"]:focus,
input[type="password"]:focus,
textarea:focus,
select:focus {
  border: 1px solid #33C3F0;
  outline: 0; }
label,
legend {
  display: block;
  margin-bottom: .5rem;
  font-weight: 600; }
fieldset {
  padding: 0;
  border-width: 0; }
input[type="checkbox"],
input[type="radio"] {
  display: inline; }
label > .label-body {
  display: inline-block;
  margin-left: .5rem;
  font-weight: normal; }
ul {
  list-style: circle; }
ol {
  list-style: decimal; }
ul ul,
ul ol,
ol ol,
ol ul {
  margin: 1.5rem 0 1.5rem 3rem;
  font-size: 90%; }
li > p {margin : 0;}
th,
td {
  padding: 12px 15px;
  text-align: left;
  border-bottom: 1px solid #E1E1E1; }
th:first-child,
td:first-child {
  padding-left: 0; }
th:last-child,
td:last-child {
  padding-right: 0; }
button,
.button {
  margin-bottom: 1rem; }
input,
textarea,
select,
fieldset {
  margin-bottom: 1.5rem; }
pre,
blockquote,
dl,
figure,
table,
p,
ul,
ol,
form {
  margin-bottom: 1.0rem; }
.u-full-width {
  width: 100%;
  box-sizing: border-box; }
.u-max-full-width {
  max-width: 100%;
  box-sizing: border-box; }
.u-pull-right {
  float: right; }
.u-pull-left {
  float: left; }
hr {
  margin-top: 3rem;
  margin-bottom: 3.5rem;
  border-width: 0;
  border-top: 1px solid #E1E1E1; }
.container:after,
.row:after,
.u-cf {
  content: "";
  display: table;
  clear: both; }

pre {
  display: block;
  padding: 9.5px;
  margin: 0 0 10px;
  font-size: 13px;
  line-height: 1.42857143;
  word-break: break-all;
  word-wrap: break-word;
  border: 1px solid #ccc;
  border-radius: 4px;
}

pre.hljl {
  margin: 0 0 10px;
  display: block;
  background: #f5f5f5;
  border-radius: 4px;
  padding : 5px;
}

pre.output {
  background: #ffffff;
}

pre.code {
  background: #ffffff;
}

pre.julia-error {
  color : red
}

code,
kbd,
pre,
samp {
  font-family: Menlo, Monaco, Consolas, "Courier New", monospace;
  font-size: 0.9em;
}


@media (min-width: 400px) {}
@media (min-width: 550px) {}
@media (min-width: 750px) {}
@media (min-width: 1000px) {}
@media (min-width: 1200px) {}

h1.title {margin-top : 20px}
img {max-width : 100%}
div.title {text-align: center;}

  </style>
</HEAD>

<BODY>
  <div class ="container">
    <div class = "row">
      <div class = "col-md-12 twelve columns">
        <div class="title">
          <h1 class="title">Code Profiling and Optimization</h1>
          <h5>Chris Rackauckas</h5>
          <h5>December 11th, 2020</h5>
        </div>

        <p>This is just a quick look into code profiling. By now we should be writing high performance parallel code which is combining machine learning and scientific computing techniques and doing large-scale parameter analyses on the models. However, at this point it may be difficult to understand where our performance difficulties lie. This is where we turn to code profiling tooling.</p>
<h2>Type Inference Checking</h2>
<p>The most common way for code to slow down is via type-inference issues. One can normally work through them by &quot;thinking like a compiler&quot; and seeing what would be inferable. For example, a common issue is to not concretely type one&#39;s types. For example:</p>


<pre class='hljl'>
<span class='hljl-k'>struct</span><span class='hljl-t'> </span><span class='hljl-n'>MyStruct</span><span class='hljl-t'>
  </span><span class='hljl-n'>a</span><span class='hljl-oB'>::</span><span class='hljl-n'>AbstractArray</span><span class='hljl-t'>
</span><span class='hljl-k'>end</span><span class='hljl-t'>
</span><span class='hljl-n'>x</span><span class='hljl-t'> </span><span class='hljl-oB'>=</span><span class='hljl-t'> </span><span class='hljl-nf'>MyStruct</span><span class='hljl-p'>([</span><span class='hljl-ni'>1</span><span class='hljl-p'>,</span><span class='hljl-ni'>2</span><span class='hljl-p'>,</span><span class='hljl-ni'>3</span><span class='hljl-p'>])</span><span class='hljl-t'>
</span><span class='hljl-k'>function</span><span class='hljl-t'> </span><span class='hljl-nf'>f</span><span class='hljl-p'>(</span><span class='hljl-n'>x</span><span class='hljl-p'>)</span><span class='hljl-t'>
  </span><span class='hljl-n'>x</span><span class='hljl-oB'>.</span><span class='hljl-n'>a</span><span class='hljl-p'>[</span><span class='hljl-ni'>1</span><span class='hljl-p'>]</span><span class='hljl-t'>
</span><span class='hljl-k'>end</span><span class='hljl-t'>
</span><span class='hljl-k'>using</span><span class='hljl-t'> </span><span class='hljl-n'>InteractiveUtils</span><span class='hljl-t'>
</span><span class='hljl-nd'>@code_warntype</span><span class='hljl-t'> </span><span class='hljl-nf'>f</span><span class='hljl-p'>(</span><span class='hljl-n'>x</span><span class='hljl-p'>)</span>
</pre>


<pre class="output">
Variables
  #self#::Core.Compiler.Const&#40;Main.##WeaveSandBox#491.f, false&#41;
  x::Main.##WeaveSandBox#491.MyStruct

Body::ANY
1 ─ &#37;1 &#61; Base.getproperty&#40;x, :a&#41;::ABSTRACTARRAY
│   &#37;2 &#61; Base.getindex&#40;&#37;1, 1&#41;::ANY
└──      return &#37;2
</pre>


<p>In this case, the return type is not inferred and using <code>MyStruct</code> will generate slow code. The reason for this is quite simple: <code>x.a</code> can only be inferred as <code>AbstractArray</code>, and thus the element type <code>x.a&#91;1&#93;</code> and the exact dispatch cannot be known until the function finds out at runtime what kind of array it is. As a result, the compiler throws the only thing it can: it puts <code>Any</code> as the inferred type and runs slow code.</p>
<p>We can instead utilize a concrete struct or use a parametric type to create a family of related structs:</p>


<pre class='hljl'>
<span class='hljl-k'>struct</span><span class='hljl-t'> </span><span class='hljl-nf'>MyStruct2</span><span class='hljl-p'>{</span><span class='hljl-n'>A</span><span class='hljl-t'> </span><span class='hljl-oB'>&lt;:</span><span class='hljl-t'> </span><span class='hljl-n'>AbstractArray</span><span class='hljl-p'>}</span><span class='hljl-t'>
  </span><span class='hljl-n'>a</span><span class='hljl-oB'>::</span><span class='hljl-n'>A</span><span class='hljl-t'>
</span><span class='hljl-k'>end</span><span class='hljl-t'>
</span><span class='hljl-n'>x2</span><span class='hljl-t'> </span><span class='hljl-oB'>=</span><span class='hljl-t'> </span><span class='hljl-nf'>MyStruct2</span><span class='hljl-p'>([</span><span class='hljl-ni'>1</span><span class='hljl-p'>,</span><span class='hljl-ni'>2</span><span class='hljl-p'>,</span><span class='hljl-ni'>3</span><span class='hljl-p'>])</span><span class='hljl-t'>
</span><span class='hljl-nd'>@code_warntype</span><span class='hljl-t'> </span><span class='hljl-nf'>f</span><span class='hljl-p'>(</span><span class='hljl-n'>x2</span><span class='hljl-p'>)</span>
</pre>


<pre class="output">
Variables
  #self#::Core.Compiler.Const&#40;Main.##WeaveSandBox#491.f, false&#41;
  x::Main.##WeaveSandBox#491.MyStruct2&#123;Array&#123;Int64,1&#125;&#125;

Body::Int64
1 ─ &#37;1 &#61; Base.getproperty&#40;x, :a&#41;::Array&#123;Int64,1&#125;
│   &#37;2 &#61; Base.getindex&#40;&#37;1, 1&#41;::Int64
└──      return &#37;2
</pre>


<p>and now it&#39;s inferred because the information that it would need is inferrable.</p>
<p>But what if we needed help? The first tool of course is <code>@code_warntype</code>. But for deeper functions you may want more tooling. A nice tool is Traceur.jl which will alert you to the lines at which you have performance issues. In our example we see:</p>


<pre class='hljl'>
<span class='hljl-k'>using</span><span class='hljl-t'> </span><span class='hljl-n'>Traceur</span><span class='hljl-t'>
</span><span class='hljl-nd'>@trace</span><span class='hljl-t'> </span><span class='hljl-nf'>f</span><span class='hljl-p'>(</span><span class='hljl-n'>x</span><span class='hljl-p'>)</span>
</pre>


<pre><code>┌ Warning: dynamic dispatch to Base.getindex&#40;Base.getfield&#40;x, a&#41;, 1&#41;
└ @ none:-1
┌ Warning: f returns Any
└ @ none:2</code></pre>
<p>which points out our first problem is getting the untyped array out of the <code>MyStruct</code>. On larger functions it can do even more:</p>


<pre class='hljl'>
<span class='hljl-k'>function</span><span class='hljl-t'> </span><span class='hljl-nf'>naive_sum</span><span class='hljl-p'>(</span><span class='hljl-n'>xs</span><span class='hljl-p'>)</span><span class='hljl-t'>
  </span><span class='hljl-n'>s</span><span class='hljl-t'> </span><span class='hljl-oB'>=</span><span class='hljl-t'> </span><span class='hljl-ni'>0</span><span class='hljl-t'>
  </span><span class='hljl-k'>for</span><span class='hljl-t'> </span><span class='hljl-n'>x</span><span class='hljl-t'> </span><span class='hljl-kp'>in</span><span class='hljl-t'> </span><span class='hljl-n'>xs</span><span class='hljl-t'>
    </span><span class='hljl-n'>s</span><span class='hljl-t'> </span><span class='hljl-oB'>+=</span><span class='hljl-t'> </span><span class='hljl-n'>x</span><span class='hljl-t'>
  </span><span class='hljl-k'>end</span><span class='hljl-t'>
  </span><span class='hljl-k'>return</span><span class='hljl-t'> </span><span class='hljl-n'>s</span><span class='hljl-t'>
</span><span class='hljl-k'>end</span><span class='hljl-t'>
</span><span class='hljl-nd'>@trace</span><span class='hljl-t'> </span><span class='hljl-nf'>naive_sum</span><span class='hljl-p'>([</span><span class='hljl-nfB'>1.</span><span class='hljl-p'>])</span>
</pre>


<pre><code>┌ Warning:  is assigned as Tuple&#123;Int64,Int64&#125;
└ @ array.jl:-1
┌ Warning:  is assigned as Nothing
└ @ array.jl:-1
┌ Warning:  is assigned as Union&#123;Nothing, Tuple&#123;Float64,Int64&#125;&#125;
└ @ none:-1
┌ Warning:  is assigned as Union&#123;Nothing, Tuple&#123;Float64,Int64&#125;&#125;
└ @ none:-1
┌ Warning: s is assigned as Int64
└ @ none:-1
┌ Warning: s is assigned as Float64
└ @ none:-1
┌ Warning: naive_sum returns Union&#123;Float64, Int64&#125;
└ @ none:2</code></pre>
<p>and alert you to multiple lines which are causing problems.</p>
<p>However, for even larger functions you can still have many issues that are hard to dig into with Julia a linear tool. For thus, Cthulhu.jl&#39;s <code>@descend</code> macro lets you interactively dig into the function to find the problematic lines. For the best introduction, watch <a href="https://www.youtube.com/watch?v&#61;qf9oA09wxXY">Valentin Churavy&#39;s JuliaCon 2019 talk</a></p>
<h2>Flame Graphs</h2>
<p>Flame graphs are a common tool for illustrating performance. To demonstrate this let&#39;s look at the solution to an ODE from DifferentialEquations.jl&#39;s OrdinaryDiffEq.jl. The code is the following:</p>


<pre class='hljl'>
<span class='hljl-k'>using</span><span class='hljl-t'> </span><span class='hljl-n'>OrdinaryDiffEq</span><span class='hljl-t'>
</span><span class='hljl-k'>function</span><span class='hljl-t'> </span><span class='hljl-nf'>lorenz</span><span class='hljl-p'>(</span><span class='hljl-n'>du</span><span class='hljl-p'>,</span><span class='hljl-n'>u</span><span class='hljl-p'>,</span><span class='hljl-n'>p</span><span class='hljl-p'>,</span><span class='hljl-n'>t</span><span class='hljl-p'>)</span><span class='hljl-t'>
 </span><span class='hljl-n'>du</span><span class='hljl-p'>[</span><span class='hljl-ni'>1</span><span class='hljl-p'>]</span><span class='hljl-t'> </span><span class='hljl-oB'>=</span><span class='hljl-t'> </span><span class='hljl-nfB'>10.0</span><span class='hljl-p'>(</span><span class='hljl-n'>u</span><span class='hljl-p'>[</span><span class='hljl-ni'>2</span><span class='hljl-p'>]</span><span class='hljl-oB'>-</span><span class='hljl-n'>u</span><span class='hljl-p'>[</span><span class='hljl-ni'>1</span><span class='hljl-p'>])</span><span class='hljl-t'>
 </span><span class='hljl-n'>du</span><span class='hljl-p'>[</span><span class='hljl-ni'>2</span><span class='hljl-p'>]</span><span class='hljl-t'> </span><span class='hljl-oB'>=</span><span class='hljl-t'> </span><span class='hljl-n'>u</span><span class='hljl-p'>[</span><span class='hljl-ni'>1</span><span class='hljl-p'>]</span><span class='hljl-oB'>*</span><span class='hljl-p'>(</span><span class='hljl-nfB'>28.0</span><span class='hljl-oB'>-</span><span class='hljl-n'>u</span><span class='hljl-p'>[</span><span class='hljl-ni'>3</span><span class='hljl-p'>])</span><span class='hljl-t'> </span><span class='hljl-oB'>-</span><span class='hljl-t'> </span><span class='hljl-n'>u</span><span class='hljl-p'>[</span><span class='hljl-ni'>2</span><span class='hljl-p'>]</span><span class='hljl-t'>
 </span><span class='hljl-n'>du</span><span class='hljl-p'>[</span><span class='hljl-ni'>3</span><span class='hljl-p'>]</span><span class='hljl-t'> </span><span class='hljl-oB'>=</span><span class='hljl-t'> </span><span class='hljl-n'>u</span><span class='hljl-p'>[</span><span class='hljl-ni'>1</span><span class='hljl-p'>]</span><span class='hljl-oB'>*</span><span class='hljl-n'>u</span><span class='hljl-p'>[</span><span class='hljl-ni'>2</span><span class='hljl-p'>]</span><span class='hljl-t'> </span><span class='hljl-oB'>-</span><span class='hljl-t'> </span><span class='hljl-p'>(</span><span class='hljl-ni'>8</span><span class='hljl-oB'>/</span><span class='hljl-ni'>3</span><span class='hljl-p'>)</span><span class='hljl-oB'>*</span><span class='hljl-n'>u</span><span class='hljl-p'>[</span><span class='hljl-ni'>3</span><span class='hljl-p'>]</span><span class='hljl-t'>
</span><span class='hljl-k'>end</span><span class='hljl-t'>
</span><span class='hljl-n'>u0</span><span class='hljl-t'> </span><span class='hljl-oB'>=</span><span class='hljl-t'> </span><span class='hljl-p'>[</span><span class='hljl-nfB'>1.0</span><span class='hljl-p'>;</span><span class='hljl-nfB'>0.0</span><span class='hljl-p'>;</span><span class='hljl-nfB'>0.0</span><span class='hljl-p'>]</span><span class='hljl-t'>
</span><span class='hljl-n'>tspan</span><span class='hljl-t'> </span><span class='hljl-oB'>=</span><span class='hljl-t'> </span><span class='hljl-p'>(</span><span class='hljl-nfB'>0.0</span><span class='hljl-p'>,</span><span class='hljl-nfB'>100.0</span><span class='hljl-p'>)</span><span class='hljl-t'>
</span><span class='hljl-n'>prob</span><span class='hljl-t'> </span><span class='hljl-oB'>=</span><span class='hljl-t'> </span><span class='hljl-nf'>ODEProblem</span><span class='hljl-p'>(</span><span class='hljl-n'>lorenz</span><span class='hljl-p'>,</span><span class='hljl-n'>u0</span><span class='hljl-p'>,</span><span class='hljl-n'>tspan</span><span class='hljl-p'>)</span><span class='hljl-t'>
</span><span class='hljl-n'>sol</span><span class='hljl-t'> </span><span class='hljl-oB'>=</span><span class='hljl-t'> </span><span class='hljl-nf'>solve</span><span class='hljl-p'>(</span><span class='hljl-n'>prob</span><span class='hljl-p'>,</span><span class='hljl-nf'>Tsit5</span><span class='hljl-p'>())</span><span class='hljl-t'>
</span><span class='hljl-k'>using</span><span class='hljl-t'> </span><span class='hljl-n'>Plots</span><span class='hljl-p'>;</span><span class='hljl-t'> </span><span class='hljl-nf'>plot</span><span class='hljl-p'>(</span><span class='hljl-n'>sol</span><span class='hljl-p'>,</span><span class='hljl-n'>vars</span><span class='hljl-oB'>=</span><span class='hljl-p'>(</span><span class='hljl-ni'>1</span><span class='hljl-p'>,</span><span class='hljl-ni'>2</span><span class='hljl-p'>,</span><span class='hljl-ni'>3</span><span class='hljl-p'>))</span>
</pre>


<img src=""  />

<p>To generate the flame graph, first we want to create a profile. To do this we will use the Profile module&#39;s <code>@profile</code>. Note that a profile should be &quot;sufficiently large&quot;, so on quick functions you may want to run the code plenty of times. Make sure the profile does not include compilation if you want good results&#33;</p>


<pre class='hljl'>
<span class='hljl-cs'># No compilation in the results</span><span class='hljl-t'>
</span><span class='hljl-n'>sol</span><span class='hljl-t'> </span><span class='hljl-oB'>=</span><span class='hljl-t'> </span><span class='hljl-nf'>solve</span><span class='hljl-p'>(</span><span class='hljl-n'>prob</span><span class='hljl-p'>,</span><span class='hljl-nf'>Tsit5</span><span class='hljl-p'>())</span><span class='hljl-t'>

</span><span class='hljl-k'>using</span><span class='hljl-t'> </span><span class='hljl-n'>Profile</span><span class='hljl-t'>
</span><span class='hljl-cs'># Profile 1000 runs</span><span class='hljl-t'>
</span><span class='hljl-nd'>@profile</span><span class='hljl-t'> </span><span class='hljl-k'>for</span><span class='hljl-t'> </span><span class='hljl-n'>i</span><span class='hljl-t'> </span><span class='hljl-kp'>in</span><span class='hljl-t'> </span><span class='hljl-ni'>1</span><span class='hljl-oB'>:</span><span class='hljl-ni'>1000</span><span class='hljl-t'> </span><span class='hljl-n'>sol</span><span class='hljl-t'> </span><span class='hljl-oB'>=</span><span class='hljl-t'> </span><span class='hljl-nf'>solve</span><span class='hljl-p'>(</span><span class='hljl-n'>prob</span><span class='hljl-p'>,</span><span class='hljl-nf'>Tsit5</span><span class='hljl-p'>())</span><span class='hljl-t'> </span><span class='hljl-k'>end</span>
</pre>



<p>This profiler is a statistical or sampling profiler, which means it periodically samples where it is at in a code and thus understands the hotspots in the code by tallying how many samples are in a certain area. We can first visualize this by printing it out:</p>


<pre class='hljl'>
<span class='hljl-cs'>#Profile.print()</span>
</pre>



<p>However, that printout can often times be hard to read. Instead, we can visualize it with a <em>flame graph</em>. There are many ways to get the flame graph, if you&#39;re in Juno, you can simply do:</p>


<pre class='hljl'>
<span class='hljl-n'>Juno</span><span class='hljl-oB'>.</span><span class='hljl-nf'>profiler</span><span class='hljl-p'>()</span>
</pre>


<p><img src="https://user-images.githubusercontent.com/1814174/69931716-45633180-1496-11ea-888e-e7bcde939083.PNG" alt="" /></p>
<p>&#40;Note that if you&#39;re not using Juno, there are equivalent tools in the package ecosystem. ProfileView.jl is a very simple flame graph generator, and PProf.jl exports to Google PProf which has many more features&#41;</p>
<p>Each block corresponds to a function call. The horizontal length is the amount of time spent in that function, while the vertical grouping is for call nesting, i.e. you are below the function that called you. The portion that is circled is where the mouse pointer was at, and while hovering over this it said what function it corresponded to: <code>recursivecopy</code> in RecursiveArrayTools.jl. If we click on this, it sends us to the hotspot:</p>
<p><img src="https://user-images.githubusercontent.com/1814174/69931830-ad197c80-1496-11ea-80bd-4c0f134d120f.PNG" alt="" /></p>
<p>Juno gives you a light indicator that tells you how much time is spent at a given line. Here we see that most of the time is spent inside of the <code>map</code> operation which calls <code>recursivecopy</code> on the elements, which then does <code>copy&#40;a&#41;</code>.</p>
<p>This tells us that the main cost of our code is the part that is copying the arrays to save them&#33; Thus let&#39;s generate a new profile where the ODE solver saves less:</p>


<pre class='hljl'>
<span class='hljl-n'>Profile</span><span class='hljl-oB'>.</span><span class='hljl-nf'>clear</span><span class='hljl-p'>()</span><span class='hljl-t'>
</span><span class='hljl-cs'># No compilation in the results</span><span class='hljl-t'>
</span><span class='hljl-n'>sol</span><span class='hljl-t'> </span><span class='hljl-oB'>=</span><span class='hljl-t'> </span><span class='hljl-nf'>solve</span><span class='hljl-p'>(</span><span class='hljl-n'>prob</span><span class='hljl-p'>,</span><span class='hljl-nf'>Tsit5</span><span class='hljl-p'>(),</span><span class='hljl-n'>save_everystep</span><span class='hljl-oB'>=</span><span class='hljl-kc'>false</span><span class='hljl-p'>)</span><span class='hljl-t'>

</span><span class='hljl-cs'># Profile 1000 runs</span><span class='hljl-t'>
</span><span class='hljl-nd'>@profile</span><span class='hljl-t'> </span><span class='hljl-k'>for</span><span class='hljl-t'> </span><span class='hljl-n'>i</span><span class='hljl-t'> </span><span class='hljl-kp'>in</span><span class='hljl-t'> </span><span class='hljl-ni'>1</span><span class='hljl-oB'>:</span><span class='hljl-ni'>1000</span><span class='hljl-t'> </span><span class='hljl-n'>sol</span><span class='hljl-t'> </span><span class='hljl-oB'>=</span><span class='hljl-t'> </span><span class='hljl-nf'>solve</span><span class='hljl-p'>(</span><span class='hljl-n'>prob</span><span class='hljl-p'>,</span><span class='hljl-nf'>Tsit5</span><span class='hljl-p'>(),</span><span class='hljl-n'>save_everystep</span><span class='hljl-oB'>=</span><span class='hljl-kc'>false</span><span class='hljl-p'>)</span><span class='hljl-t'> </span><span class='hljl-k'>end</span>
</pre>




<pre class='hljl'>
<span class='hljl-n'>Juno</span><span class='hljl-oB'>.</span><span class='hljl-nf'>profiler</span><span class='hljl-p'>()</span>
</pre>


<p><img src="https://user-images.githubusercontent.com/1814174/69931923-08e40580-1497-11ea-9583-2d76af8316e4.PNG" alt="" /></p>
<p>Now we see that the majority of the time is spent in the <code>perform_step&#33;</code> method, which is:</p>
<p><img src="https://user-images.githubusercontent.com/1814174/69931943-2add8800-1497-11ea-94f8-b244dcac12f4.PNG" alt="" /></p>
<p>We can notice that there is still quite a bit of jitter in the profile since each of the <code>f</code> calls here should be exactly the same length, but upping the number of solves in the loop would help with that:</p>


<pre class='hljl'>
<span class='hljl-nd'>@profile</span><span class='hljl-t'> </span><span class='hljl-k'>for</span><span class='hljl-t'> </span><span class='hljl-n'>i</span><span class='hljl-t'> </span><span class='hljl-kp'>in</span><span class='hljl-t'> </span><span class='hljl-ni'>1</span><span class='hljl-oB'>:</span><span class='hljl-ni'>10000</span><span class='hljl-t'> </span><span class='hljl-n'>sol</span><span class='hljl-t'> </span><span class='hljl-oB'>=</span><span class='hljl-t'> </span><span class='hljl-nf'>solve</span><span class='hljl-p'>(</span><span class='hljl-n'>prob</span><span class='hljl-p'>,</span><span class='hljl-nf'>Tsit5</span><span class='hljl-p'>(),</span><span class='hljl-n'>save_everystep</span><span class='hljl-oB'>=</span><span class='hljl-kc'>false</span><span class='hljl-p'>)</span><span class='hljl-t'> </span><span class='hljl-k'>end</span>
</pre>




<pre class='hljl'>
<span class='hljl-n'>Juno</span><span class='hljl-oB'>.</span><span class='hljl-nf'>profiler</span><span class='hljl-p'>()</span>
</pre>


<p><img src="https://user-images.githubusercontent.com/1814174/69932011-755f0480-1497-11ea-8839-ecc8f5d7c9fc.PNG" alt="" /></p>
<p>Now that this looks like a fairly good profile, we can use this to dig in and find out what lines need to be optimized&#33;</p>


        <HR/>
        <div class="footer">
          <p>
            Published from <a href="code_profiling.jmd">code_profiling.jmd</a>
            using <a href="http://github.com/JunoLab/Weave.jl">Weave.jl</a> v0.10.6 on 2020-12-11.
          </p>
        </div>
      </div>
    </div>
  </div>
</BODY>

</HTML>
